Homework 1

Author

Allison Louie

#1

library(data.table)
data_2002 <- fread("2002_data.csv")
data_2022 <- fread("2022_data.csv")
dim(data_2002)
[1] 15976    20
head(data_2002)
         Date Source  Site ID POC Daily Mean PM2.5 Concentration    UNITS
1: 01/05/2002    AQS 60010007   1                           25.1 ug/m3 LC
2: 01/06/2002    AQS 60010007   1                           31.6 ug/m3 LC
3: 01/08/2002    AQS 60010007   1                           21.4 ug/m3 LC
4: 01/11/2002    AQS 60010007   1                           25.9 ug/m3 LC
5: 01/14/2002    AQS 60010007   1                           34.5 ug/m3 LC
6: 01/17/2002    AQS 60010007   1                           41.0 ug/m3 LC
   DAILY_AQI_VALUE Site Name DAILY_OBS_COUNT PERCENT_COMPLETE
1:              78 Livermore               1              100
2:              92 Livermore               1              100
3:              71 Livermore               1              100
4:              80 Livermore               1              100
5:              98 Livermore               1              100
6:             115 Livermore               1              100
   AQS_PARAMETER_CODE       AQS_PARAMETER_DESC CBSA_CODE
1:              88101 PM2.5 - Local Conditions     41860
2:              88101 PM2.5 - Local Conditions     41860
3:              88101 PM2.5 - Local Conditions     41860
4:              88101 PM2.5 - Local Conditions     41860
5:              88101 PM2.5 - Local Conditions     41860
6:              88101 PM2.5 - Local Conditions     41860
                           CBSA_NAME STATE_CODE      STATE COUNTY_CODE  COUNTY
1: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
2: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
3: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
4: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
5: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
6: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
   SITE_LATITUDE SITE_LONGITUDE
1:      37.68753      -121.7842
2:      37.68753      -121.7842
3:      37.68753      -121.7842
4:      37.68753      -121.7842
5:      37.68753      -121.7842
6:      37.68753      -121.7842
tail(data_2002)
         Date Source  Site ID POC Daily Mean PM2.5 Concentration    UNITS
1: 12/10/2002    AQS 61131003   1                             15 ug/m3 LC
2: 12/13/2002    AQS 61131003   1                             15 ug/m3 LC
3: 12/22/2002    AQS 61131003   1                              1 ug/m3 LC
4: 12/25/2002    AQS 61131003   1                             23 ug/m3 LC
5: 12/28/2002    AQS 61131003   1                              5 ug/m3 LC
6: 12/31/2002    AQS 61131003   1                              6 ug/m3 LC
   DAILY_AQI_VALUE            Site Name DAILY_OBS_COUNT PERCENT_COMPLETE
1:              57 Woodland-Gibson Road               1              100
2:              57 Woodland-Gibson Road               1              100
3:               4 Woodland-Gibson Road               1              100
4:              74 Woodland-Gibson Road               1              100
5:              21 Woodland-Gibson Road               1              100
6:              25 Woodland-Gibson Road               1              100
   AQS_PARAMETER_CODE       AQS_PARAMETER_DESC CBSA_CODE
1:              88101 PM2.5 - Local Conditions     40900
2:              88101 PM2.5 - Local Conditions     40900
3:              88101 PM2.5 - Local Conditions     40900
4:              88101 PM2.5 - Local Conditions     40900
5:              88101 PM2.5 - Local Conditions     40900
6:              88101 PM2.5 - Local Conditions     40900
                                 CBSA_NAME STATE_CODE      STATE COUNTY_CODE
1: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
2: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
3: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
4: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
5: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
6: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
   COUNTY SITE_LATITUDE SITE_LONGITUDE
1:   Yolo      38.66121      -121.7327
2:   Yolo      38.66121      -121.7327
3:   Yolo      38.66121      -121.7327
4:   Yolo      38.66121      -121.7327
5:   Yolo      38.66121      -121.7327
6:   Yolo      38.66121      -121.7327
names(data_2002)
 [1] "Date"                           "Source"                        
 [3] "Site ID"                        "POC"                           
 [5] "Daily Mean PM2.5 Concentration" "UNITS"                         
 [7] "DAILY_AQI_VALUE"                "Site Name"                     
 [9] "DAILY_OBS_COUNT"                "PERCENT_COMPLETE"              
[11] "AQS_PARAMETER_CODE"             "AQS_PARAMETER_DESC"            
[13] "CBSA_CODE"                      "CBSA_NAME"                     
[15] "STATE_CODE"                     "STATE"                         
[17] "COUNTY_CODE"                    "COUNTY"                        
[19] "SITE_LATITUDE"                  "SITE_LONGITUDE"                
str(data_2002)
Classes 'data.table' and 'data.frame':  15976 obs. of  20 variables:
 $ Date                          : chr  "01/05/2002" "01/06/2002" "01/08/2002" "01/11/2002" ...
 $ Source                        : chr  "AQS" "AQS" "AQS" "AQS" ...
 $ Site ID                       : int  60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 ...
 $ POC                           : int  1 1 1 1 1 1 1 1 1 1 ...
 $ Daily Mean PM2.5 Concentration: num  25.1 31.6 21.4 25.9 34.5 41 29.3 15 18.8 37.9 ...
 $ UNITS                         : chr  "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" ...
 $ DAILY_AQI_VALUE               : int  78 92 71 80 98 115 87 57 65 107 ...
 $ Site Name                     : chr  "Livermore" "Livermore" "Livermore" "Livermore" ...
 $ DAILY_OBS_COUNT               : int  1 1 1 1 1 1 1 1 1 1 ...
 $ PERCENT_COMPLETE              : num  100 100 100 100 100 100 100 100 100 100 ...
 $ AQS_PARAMETER_CODE            : int  88101 88101 88101 88101 88101 88101 88101 88101 88101 88101 ...
 $ AQS_PARAMETER_DESC            : chr  "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" ...
 $ CBSA_CODE                     : int  41860 41860 41860 41860 41860 41860 41860 41860 41860 41860 ...
 $ CBSA_NAME                     : chr  "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" ...
 $ STATE_CODE                    : int  6 6 6 6 6 6 6 6 6 6 ...
 $ STATE                         : chr  "California" "California" "California" "California" ...
 $ COUNTY_CODE                   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ COUNTY                        : chr  "Alameda" "Alameda" "Alameda" "Alameda" ...
 $ SITE_LATITUDE                 : num  37.7 37.7 37.7 37.7 37.7 ...
 $ SITE_LONGITUDE                : num  -122 -122 -122 -122 -122 ...
 - attr(*, ".internal.selfref")=<externalptr> 
summary(data_2002)
     Date              Source             Site ID              POC       
 Length:15976       Length:15976       Min.   :60010007   Min.   :1.000  
 Class :character   Class :character   1st Qu.:60290014   1st Qu.:1.000  
 Mode  :character   Mode  :character   Median :60590007   Median :1.000  
                                       Mean   :60549600   Mean   :1.581  
                                       3rd Qu.:60731002   3rd Qu.:1.000  
                                       Max.   :61131003   Max.   :6.000  
                                                                         
 Daily Mean PM2.5 Concentration    UNITS           DAILY_AQI_VALUE 
 Min.   :  0.00                 Length:15976       Min.   :  0.00  
 1st Qu.:  7.00                 Class :character   1st Qu.: 29.00  
 Median : 12.00                 Mode  :character   Median : 50.00  
 Mean   : 16.12                                    Mean   : 53.68  
 3rd Qu.: 20.50                                    3rd Qu.: 69.00  
 Max.   :104.30                                    Max.   :176.00  
                                                                   
  Site Name         DAILY_OBS_COUNT PERCENT_COMPLETE AQS_PARAMETER_CODE
 Length:15976       Min.   :1       Min.   :100      Min.   :88101     
 Class :character   1st Qu.:1       1st Qu.:100      1st Qu.:88101     
 Mode  :character   Median :1       Median :100      Median :88101     
                    Mean   :1       Mean   :100      Mean   :88215     
                    3rd Qu.:1       3rd Qu.:100      3rd Qu.:88502     
                    Max.   :1       Max.   :100      Max.   :88502     
                                                                       
 AQS_PARAMETER_DESC   CBSA_CODE      CBSA_NAME           STATE_CODE
 Length:15976       Min.   :12540   Length:15976       Min.   :6   
 Class :character   1st Qu.:23420   Class :character   1st Qu.:6   
 Mode  :character   Median :40140   Mode  :character   Median :6   
                    Mean   :33270                      Mean   :6   
                    3rd Qu.:41740                      3rd Qu.:6   
                    Max.   :49700                      Max.   :6   
                    NA's   :929                                    
    STATE            COUNTY_CODE        COUNTY          SITE_LATITUDE  
 Length:15976       Min.   :  1.00   Length:15976       Min.   :32.63  
 Class :character   1st Qu.: 29.00   Class :character   1st Qu.:34.07  
 Mode  :character   Median : 59.00   Mode  :character   Median :35.36  
                    Mean   : 54.78                      Mean   :36.00  
                    3rd Qu.: 73.00                      3rd Qu.:37.77  
                    Max.   :113.00                      Max.   :41.71  
                                                                       
 SITE_LONGITUDE  
 Min.   :-124.2  
 1st Qu.:-121.4  
 Median :-119.1  
 Mean   :-119.4  
 3rd Qu.:-117.9  
 Max.   :-115.5  
                 
dim(data_2022)
[1] 57595    20
head(data_2022)
         Date Source  Site ID POC Daily Mean PM2.5 Concentration    UNITS
1: 01/01/2022    AQS 60010007   3                           12.7 ug/m3 LC
2: 01/02/2022    AQS 60010007   3                           13.9 ug/m3 LC
3: 01/03/2022    AQS 60010007   3                            7.1 ug/m3 LC
4: 01/04/2022    AQS 60010007   3                            3.7 ug/m3 LC
5: 01/05/2022    AQS 60010007   3                            4.2 ug/m3 LC
6: 01/06/2022    AQS 60010007   3                            3.8 ug/m3 LC
   DAILY_AQI_VALUE Site Name DAILY_OBS_COUNT PERCENT_COMPLETE
1:              52 Livermore               1              100
2:              55 Livermore               1              100
3:              30 Livermore               1              100
4:              15 Livermore               1              100
5:              18 Livermore               1              100
6:              16 Livermore               1              100
   AQS_PARAMETER_CODE       AQS_PARAMETER_DESC CBSA_CODE
1:              88101 PM2.5 - Local Conditions     41860
2:              88101 PM2.5 - Local Conditions     41860
3:              88101 PM2.5 - Local Conditions     41860
4:              88101 PM2.5 - Local Conditions     41860
5:              88101 PM2.5 - Local Conditions     41860
6:              88101 PM2.5 - Local Conditions     41860
                           CBSA_NAME STATE_CODE      STATE COUNTY_CODE  COUNTY
1: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
2: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
3: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
4: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
5: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
6: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
   SITE_LATITUDE SITE_LONGITUDE
1:      37.68753      -121.7842
2:      37.68753      -121.7842
3:      37.68753      -121.7842
4:      37.68753      -121.7842
5:      37.68753      -121.7842
6:      37.68753      -121.7842
tail(data_2022)
         Date Source  Site ID POC Daily Mean PM2.5 Concentration    UNITS
1: 12/01/2022    AQS 61131003   1                            3.4 ug/m3 LC
2: 12/07/2022    AQS 61131003   1                            3.8 ug/m3 LC
3: 12/13/2022    AQS 61131003   1                            6.0 ug/m3 LC
4: 12/19/2022    AQS 61131003   1                           34.8 ug/m3 LC
5: 12/25/2022    AQS 61131003   1                           23.2 ug/m3 LC
6: 12/31/2022    AQS 61131003   1                            1.0 ug/m3 LC
   DAILY_AQI_VALUE            Site Name DAILY_OBS_COUNT PERCENT_COMPLETE
1:              14 Woodland-Gibson Road               1              100
2:              16 Woodland-Gibson Road               1              100
3:              25 Woodland-Gibson Road               1              100
4:              99 Woodland-Gibson Road               1              100
5:              74 Woodland-Gibson Road               1              100
6:               4 Woodland-Gibson Road               1              100
   AQS_PARAMETER_CODE       AQS_PARAMETER_DESC CBSA_CODE
1:              88101 PM2.5 - Local Conditions     40900
2:              88101 PM2.5 - Local Conditions     40900
3:              88101 PM2.5 - Local Conditions     40900
4:              88101 PM2.5 - Local Conditions     40900
5:              88101 PM2.5 - Local Conditions     40900
6:              88101 PM2.5 - Local Conditions     40900
                                 CBSA_NAME STATE_CODE      STATE COUNTY_CODE
1: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
2: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
3: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
4: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
5: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
6: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
   COUNTY SITE_LATITUDE SITE_LONGITUDE
1:   Yolo      38.66121      -121.7327
2:   Yolo      38.66121      -121.7327
3:   Yolo      38.66121      -121.7327
4:   Yolo      38.66121      -121.7327
5:   Yolo      38.66121      -121.7327
6:   Yolo      38.66121      -121.7327
str(data_2022)
Classes 'data.table' and 'data.frame':  57595 obs. of  20 variables:
 $ Date                          : chr  "01/01/2022" "01/02/2022" "01/03/2022" "01/04/2022" ...
 $ Source                        : chr  "AQS" "AQS" "AQS" "AQS" ...
 $ Site ID                       : int  60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 ...
 $ POC                           : int  3 3 3 3 3 3 3 3 3 3 ...
 $ Daily Mean PM2.5 Concentration: num  12.7 13.9 7.1 3.7 4.2 3.8 2.3 6.9 13.6 11.2 ...
 $ UNITS                         : chr  "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" ...
 $ DAILY_AQI_VALUE               : int  52 55 30 15 18 16 10 29 54 47 ...
 $ Site Name                     : chr  "Livermore" "Livermore" "Livermore" "Livermore" ...
 $ DAILY_OBS_COUNT               : int  1 1 1 1 1 1 1 1 1 1 ...
 $ PERCENT_COMPLETE              : num  100 100 100 100 100 100 100 100 100 100 ...
 $ AQS_PARAMETER_CODE            : int  88101 88101 88101 88101 88101 88101 88101 88101 88101 88101 ...
 $ AQS_PARAMETER_DESC            : chr  "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" ...
 $ CBSA_CODE                     : int  41860 41860 41860 41860 41860 41860 41860 41860 41860 41860 ...
 $ CBSA_NAME                     : chr  "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" ...
 $ STATE_CODE                    : int  6 6 6 6 6 6 6 6 6 6 ...
 $ STATE                         : chr  "California" "California" "California" "California" ...
 $ COUNTY_CODE                   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ COUNTY                        : chr  "Alameda" "Alameda" "Alameda" "Alameda" ...
 $ SITE_LATITUDE                 : num  37.7 37.7 37.7 37.7 37.7 ...
 $ SITE_LONGITUDE                : num  -122 -122 -122 -122 -122 ...
 - attr(*, ".internal.selfref")=<externalptr> 
summary(data_2022)
     Date              Source             Site ID              POC        
 Length:57595       Length:57595       Min.   :60010007   Min.   : 1.000  
 Class :character   Class :character   1st Qu.:60311004   1st Qu.: 1.000  
 Mode  :character   Mode  :character   Median :60631007   Median : 3.000  
                                       Mean   :60571891   Mean   : 2.535  
                                       3rd Qu.:60771003   3rd Qu.: 3.000  
                                       Max.   :61131003   Max.   :21.000  
                                                                          
 Daily Mean PM2.5 Concentration    UNITS           DAILY_AQI_VALUE 
 Min.   : -2.200                Length:57595       Min.   :  0.00  
 1st Qu.:  4.200                Class :character   1st Qu.: 18.00  
 Median :  7.000                Mode  :character   Median : 29.00  
 Mean   :  8.583                                   Mean   : 33.01  
 3rd Qu.: 10.900                                   3rd Qu.: 45.00  
 Max.   :302.500                                   Max.   :353.00  
                                                                   
  Site Name         DAILY_OBS_COUNT PERCENT_COMPLETE AQS_PARAMETER_CODE
 Length:57595       Min.   :1       Min.   :100      Min.   :88101     
 Class :character   1st Qu.:1       1st Qu.:100      1st Qu.:88101     
 Mode  :character   Median :1       Median :100      Median :88101     
                    Mean   :1       Mean   :100      Mean   :88195     
                    3rd Qu.:1       3rd Qu.:100      3rd Qu.:88101     
                    Max.   :1       Max.   :100      Max.   :88502     
                                                                       
 AQS_PARAMETER_DESC   CBSA_CODE      CBSA_NAME           STATE_CODE
 Length:57595       Min.   :12540   Length:57595       Min.   :6   
 Class :character   1st Qu.:31080   Class :character   1st Qu.:6   
 Mode  :character   Median :40140   Mode  :character   Median :6   
                    Mean   :35447                      Mean   :6   
                    3rd Qu.:41860                      3rd Qu.:6   
                    Max.   :49700                      Max.   :6   
                    NA's   :4725                                   
    STATE            COUNTY_CODE        COUNTY          SITE_LATITUDE  
 Length:57595       Min.   :  1.00   Length:57595       Min.   :32.58  
 Class :character   1st Qu.: 31.00   Class :character   1st Qu.:34.14  
 Mode  :character   Median : 63.00   Mode  :character   Median :36.60  
                    Mean   : 57.04                      Mean   :36.37  
                    3rd Qu.: 77.00                      3rd Qu.:38.10  
                    Max.   :113.00                      Max.   :41.76  
                                                                       
 SITE_LONGITUDE  
 Min.   :-124.2  
 1st Qu.:-121.5  
 Median :-119.8  
 Mean   :-119.7  
 3rd Qu.:-118.1  
 Max.   :-115.5  
                 

In summary, the data sets from 2002 and 2022 have the same variables being observed. Since we are looking at the air pollution through daily concentrations of PM2.5, it seems like there are more information from 2022 than in 2002, appearing to be a change.

#2

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:data.table':

    between, first, last
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:data.table':

    hour, isoweek, mday, minute, month, quarter, second, wday, week,
    yday, year
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
comb_airpoll <- rbind(data_2002, data_2022)
comb_airpoll$newdate <- as.Date(comb_airpoll$Date, format = "%m/%d/%Y")
comb_airpoll$year <- year(comb_airpoll$newdate)

#3

library(leaflet)
library(leaflet.providers)
comb_airpoll2 <- comb_airpoll[,.(year, lat = SITE_LATITUDE, lon = SITE_LONGITUDE), by=c("CBSA_CODE")]
comb_airpoll2 <- comb_airpoll2[!is.na(year)]

year.pal <- colorNumeric(c('blue', 'red'), domain = comb_airpoll$year)
year.pal
function (x) 
{
    if (length(x) == 0 || all(is.na(x))) {
        return(pf(x))
    }
    if (is.null(rng)) 
        rng <- range(x, na.rm = TRUE)
    rescaled <- scales::rescale(x, from = rng)
    if (any(rescaled < 0 | rescaled > 1, na.rm = TRUE)) 
        warning("Some values were outside the color scale and will be treated as NA")
    if (reverse) {
        rescaled <- 1 - rescaled
    }
    pf(rescaled)
}
<bytecode: 0x7f8c4f4d7f18>
<environment: 0x7f8c4f4d58a8>
attr(,"colorType")
[1] "numeric"
attr(,"colorArgs")
attr(,"colorArgs")$na.color
[1] "#808080"
map <- leaflet(data = comb_airpoll) %>%
  addProviderTiles('CartoDB.Positron') %>%
  addCircles(
    lat = ~SITE_LATITUDE,
    lng = ~SITE_LONGITUDE,
    label = ~paste0(round(year, 2), "%"),
    color = ~year.pal(year),
    opacity = 1,
    fillOpacity = 1,
    radius = 5
  ) %>%
  addLegend(
    position = "bottomright",
    colors = c("blue", "red"),
    labels = c("2002", "2022"),
    title = "Years",
    opacity = 0.7
  )
map

Within the state of California, the monitoring sites are mostly in 2022 all over CA while in 2002, there were very little amount of them to record. For 2002, it seems that there are only a few scattered around.

#4

missing_values <- sum(is.na(comb_airpoll$`Daily Mean PM2.5 Concentration`))
missing_values
[1] 0
library(ggplot2)

ggplot(comb_airpoll, aes(x = year, y = `Daily Mean PM2.5 Concentration`)) +
  geom_line() +
  labs(
    x = "Year",
    y = "Daily Mean PM2.5 Concentration (µg/m³ LC)"
  ) +
  ggtitle("Temporal Patterns of PM2.5 Concentrations")

The patterns over the years seem to have greatly increased when looking at the daily mean concentration.

#5

state_data <- comb_airpoll %>%
  filter(year %in% c(2002, 2022)) %>%
  group_by(STATE, year)
  
ggplot(state_data, aes(x = STATE, y = `Daily Mean PM2.5 Concentration`, fill = factor(year))) +
  geom_boxplot() +
  labs(
    x = "State",
    y = "Daily Mean PM2.5 Concentration (µg/m³)",
    fill = "Year"
  ) +
  ggtitle("Distribution of Daily Mean PM2.5 Concentrations by State (2002 vs. 2022)")

ggplot(state_data, aes(x = year, y = `Daily Mean PM2.5 Concentration`, group = STATE, color = STATE)) +
  geom_line() +
  labs(
    x = "Year",
    y = "Mean PM2.5 Concentration (µg/m³)",
    title = "Trend in Daily Mean PM2.5 Concentrations by State (2002 vs. 2022)"
  )

In the state of California (which was observed in the data), the daily mean PM2.5 concentrations seemed to have had a wider variation in 2022 compared to 2002, causing for the mean to be smaller in 2022 compared to 2002.

county_data <- state_data %>%
  group_by(COUNTY, year)

ggplot(county_data, aes(x = COUNTY, y = `Daily Mean PM2.5 Concentration`, fill = factor(year))) +
  geom_boxplot() +
  labs(
    x = "County",
    y = "Daily Mean PM2.5 Concentration (µg/m³)",
    title = "Boxplot of Daily Mean PM2.5 Concentrations by County (2002 vs. 2022)"
  ) +
  scale_fill_manual(values = c("blue", "red"))

ggplot(county_data, aes(x = `Daily Mean PM2.5 Concentration`, fill = factor(year))) +
  geom_histogram() +
  labs(
    x = "Daily Mean PM2.5 Concentration (µg/m³)",
    y = "Count",
    title = "Histogram of Daily Mean PM2.5 Concentrations by County (2002 vs. 2022)"
  ) +
  scale_fill_manual(values = c("blue", "red"))
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

For counties, overall it appears that they have significantly decreased in variation and mean in the daily mean of PM2.5 in 2022 compared to 2002.

la_county_data <- comb_airpoll %>%
  filter(COUNTY == "Los Angeles")

site_data <- la_county_data %>%
  group_by(`Site Name`)

ggplot(site_data, aes(x = `Site Name`, y = `Daily Mean PM2.5 Concentration`, fill = factor(year))) +
  geom_boxplot() +
  labs(
    x = "Site Name",
    y = "Daily Mean PM2.5 Concentration (µg/m³)",
    title = "Boxplot of Mean PM2.5 Concentrations by Site (2002 vs. 2022)"
  ) +
  scale_fill_manual(values = c("blue", "red"))

ggplot(site_data, aes(x = `Daily Mean PM2.5 Concentration`, fill = factor(year))) +
  geom_histogram() +
  labs(
    x = "Daily Mean PM2.5 Concentration (µg/m³)",
    y = "Count",
    title = "Histogram of Daily Mean PM2.5 Concentrations by Site (2002 vs. 2022)"
  ) +
  scale_fill_manual(values = c("blue", "red"))
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

For sites in Los Angeles, it appears that the same trend it happening to the counties and states. Overall, it seems that sites have been able to see less of the concentrations in 2022 compared to 2002. But in terms of the daily mean in general compared to counties and states, there is a lot more to observe compared to counties.